loading required libraries.

library(tidyverse)
library(ggplot2)
library(forecast)
library(fpp2)
library(fma)
library(lmtest)
library(dbplyr)
library(lubridate)
library(seasonal)
library(urca)
library(tseries)
library(gridExtra)
food<- read_csv('../data/calendar_factors.csv')
food.252 = as.vector(unlist(food['sales']))

Exploratory Data Analysis (EDA)

# frequency m = 365
foods_sale = ts(food.252, start=c(2011,29), end = c(2016,144), frequency = 365) # 1941 days 
foods_train = window(foods_sale, start=c(2011,29), end = c(2016,116) ) # 1913 days 1-1913
foods_test = window(foods_sale, start=c(2016,117), end = c(2016,144) ) # 28 days 1914-1941
# frequency m = 7
daily_ts = ts(food.252, start=c(1,7), end = c(279,1), frequency = 7) # 1-1941: 2011-1-29 ~ 2016-5-22
daily_train = subset(daily_ts, end = 1913) # training set: 1-1913, 2011-1-29 ~ 2016-4-25 c(1,7)
daily_test = subset(daily_ts, start = 1914) # test set: 1913-1941, 2016-4-26 ~ 2016-5-22 c(275,2)
# m=365
autoplot(foods_train) + ggtitle("Daily unit sales (01/19/2011 - 05/12/2016)") + ylab("Figure.1 Daily unit sales") + xlab("Year")

autoplot(decompose(foods_train, type='additive'))+ ggtitle("Figure.2 Additive decomposistion") + xlab("Year") + ylab("Component") # additive decomposition

autoplot(decompose(foods_train, type='mult')) + ggtitle("Multiplicative decomposition") + xlab("Year") + ylab("Component") # multiplicative decomposition

hist(foods_train, main = 'Distribution of daily unit sales', xlab='Daily unit sale')

# m=7
autoplot(daily_train) + ggtitle("Unit sale time plot")

autoplot(decompose(daily_train, type='additive')) # additive decomposition

autoplot(decompose(daily_train, type='mult')) # multiplicative decomposition

hist(daily_train)

autoplot(diff(daily_train, lag=7))

ggsubseriesplot(daily_train) + ggtitle("Unit sales during a week") # unit sale is higher on weekends

summary(foods_train)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   4.00   26.00   36.00   39.34   49.00  147.00 
# m=365
print(Acf(foods_train))  # non-stationary, as r1 is large and positive, and the ACF drops to zero slowly

Autocorrelations of series ‘foods_train’, by lag

     0      1      2      3      4      5      6      7      8      9     10     11     12     13     14     15     16     17 
 1.000  0.567  0.190  0.013  0.009  0.156  0.466  0.660  0.445  0.124 -0.024 -0.025  0.130  0.435  0.625  0.427  0.117 -0.034 
    18     19     20     21     22     23     24     25     26     27     28     29     30     31     32     33     34     35 
-0.047  0.090  0.403  0.599  0.410  0.104 -0.044 -0.050  0.094  0.403  0.596  0.384  0.081 -0.066 -0.080  0.073  0.386  0.569 
    36     37     38     39     40     41     42     43     44     45     46     47     48     49     50     51     52     53 
 0.356  0.058 -0.088 -0.100  0.043  0.348  0.544  0.356  0.050 -0.106 -0.121  0.032  0.333  0.541  0.354  0.042 -0.104 -0.108 
    54     55     56     57     58     59     60     61     62     63     64     65     66     67     68     69     70     71 
 0.035  0.321  0.501  0.321  0.032 -0.120 -0.121  0.019  0.312  0.478  0.298  0.024 -0.136 -0.156 -0.022  0.274  0.462  0.286 
    72     73     74     75     76     77     78     79     80     81     82     83     84     85     86     87     88     89 
 0.002 -0.164 -0.167 -0.024  0.272  0.458  0.277 -0.013 -0.159 -0.164 -0.043  0.246  0.427  0.250 -0.046 -0.185 -0.202 -0.057 
    90     91     92     93     94     95     96     97     98     99    100    101    102    103    104    105    106    107 
 0.218  0.411  0.232 -0.044 -0.191 -0.190 -0.060  0.214  0.399  0.221 -0.041 -0.183 -0.188 -0.060  0.213  0.403  0.233 -0.042 
   108    109    110    111    112    113    114    115    116    117    118    119    120    121    122    123    124    125 
-0.196 -0.196 -0.082  0.191  0.379  0.211 -0.074 -0.215 -0.229 -0.096  0.181  0.376  0.200 -0.071 -0.225 -0.236 -0.103  0.194 
   126    127    128    129    130    131    132    133    134    135    136    137    138    139    140    141    142    143 
 0.362  0.186 -0.085 -0.220 -0.217 -0.090  0.178  0.349  0.190 -0.094 -0.231 -0.247 -0.119  0.147  0.316  0.156 -0.123 -0.268 
   144    145    146    147    148    149    150    151    152    153    154    155    156    157    158    159    160    161 
-0.272 -0.139  0.135  0.316  0.135 -0.141 -0.282 -0.275 -0.147  0.127  0.316  0.137 -0.141 -0.276 -0.284 -0.137  0.131  0.300 
   162    163    164    165    166    167    168    169    170    171    172    173    174    175    176    177    178    179 
 0.131 -0.150 -0.299 -0.292 -0.151  0.128  0.305  0.138 -0.145 -0.280 -0.277 -0.150  0.124  0.292  0.120 -0.142 -0.283 -0.275 
   180    181    182    183    184    185    186    187    188    189    190    191    192    193    194    195    196    197 
-0.142  0.123  0.295  0.109 -0.158 -0.292 -0.290 -0.149  0.121  0.283  0.113 -0.159 -0.284 -0.286 -0.143  0.117  0.291  0.104 
   198    199    200    201    202    203    204    205    206    207    208    209    210    211    212    213    214    215 
-0.176 -0.304 -0.296 -0.164  0.113  0.280  0.104 -0.164 -0.286 -0.290 -0.146  0.114  0.292  0.118 -0.151 -0.282 -0.286 -0.147 
   216    217    218    219    220    221    222    223    224    225    226    227    228    229    230    231    232    233 
 0.129  0.292  0.112 -0.151 -0.274 -0.263 -0.113  0.147  0.311  0.129 -0.121 -0.242 -0.234 -0.093  0.174  0.336  0.165 -0.104 
   234    235    236    237    238    239    240    241    242    243    244    245    246    247    248    249    250    251 
-0.214 -0.209 -0.076  0.175  0.338  0.162 -0.103 -0.229 -0.222 -0.082  0.176  0.331  0.169 -0.092 -0.224 -0.212 -0.073  0.187 
   252    253    254    255    256    257    258    259    260    261    262    263    264    265    266    267    268    269 
 0.342  0.173 -0.086 -0.200 -0.181 -0.044  0.201  0.350  0.194 -0.059 -0.172 -0.180 -0.055  0.195  0.355  0.190 -0.067 -0.186 
   270    271    272    273    274    275    276    277    278    279    280    281    282    283    284    285    286    287 
-0.175 -0.046  0.205  0.356  0.187 -0.054 -0.199 -0.180 -0.048  0.218  0.372  0.207 -0.049 -0.170 -0.153 -0.019  0.226  0.388 
   288    289    290    291    292    293    294    295    296    297    298    299    300    301    302    303    304    305 
 0.224 -0.042 -0.161 -0.158 -0.017  0.239  0.389  0.232 -0.029 -0.157 -0.160 -0.027  0.238  0.396  0.236 -0.029 -0.149 -0.119 
   306    307    308    309    310    311    312    313    314    315    316    317    318    319    320    321    322    323 
 0.006  0.255  0.416  0.248 -0.010 -0.125 -0.111  0.015  0.274  0.436  0.254 -0.006 -0.123 -0.124  0.010  0.272  0.449  0.263 
   324    325    326    327    328    329    330    331    332    333    334    335    336    337    338    339    340    341 
 0.008 -0.126 -0.124  0.020  0.287  0.443  0.268  0.018 -0.104 -0.095  0.031  0.298  0.471  0.297  0.026 -0.105 -0.089  0.031 
   342    343    344    345    346    347    348    349    350    351    352    353    354    355    356    357    358    359 
 0.297  0.451  0.284  0.020 -0.101 -0.092  0.033  0.287  0.442  0.280  0.022 -0.106 -0.107  0.029  0.294  0.465  0.287  0.033 
   360    361    362    363    364    365    366    367    368    369    370    371    372    373    374    375    376    377 
-0.089 -0.090  0.065  0.334  0.528  0.332  0.054 -0.075 -0.086  0.042  0.304  0.469  0.292  0.023 -0.108 -0.105  0.026  0.287 
   378    379    380    381    382    383    384    385    386    387    388    389    390    391    392    393    394    395 
 0.435  0.272  0.009 -0.115 -0.119  0.002  0.258  0.430  0.269  0.019 -0.120 -0.119  0.009  0.259  0.423  0.251 -0.003 -0.119 
   396    397    398    399    400    401    402    403    404    405    406    407    408    409    410    411    412    413 
-0.115  0.000  0.253  0.390  0.225 -0.021 -0.135 -0.136 -0.027  0.237  0.395  0.233 -0.026 -0.134 -0.143 -0.030  0.214  0.392 
   414    415    416    417    418    419    420    421    422    423    424    425    426    427    428    429    430    431 
 0.242 -0.020 -0.126 -0.137 -0.025  0.214  0.362  0.209 -0.030 -0.151 -0.144 -0.037  0.200  0.341  0.179 -0.043 -0.173 -0.178 
   432    433    434    435    436    437    438    439    440    441    442    443    444    445    446    447    448    449 
-0.068  0.163  0.320  0.170 -0.069 -0.186 -0.187 -0.071  0.175  0.323  0.161 -0.071 -0.187 -0.189 -0.082  0.167  0.307  0.157 
   450    451    452    453    454    455    456    457    458    459    460    461    462    463    464    465    466    467 
-0.082 -0.194 -0.202 -0.092  0.135  0.286  0.147 -0.070 -0.192 -0.189 -0.095  0.120  0.280  0.139 -0.091 -0.197 -0.194 -0.095 
   468    469    470    471    472    473    474    475    476    477    478    479    480    481    482    483    484    485 
 0.127  0.283  0.131 -0.095 -0.203 -0.198 -0.093  0.121  0.263  0.119 -0.104 -0.216 -0.229 -0.115  0.115  0.263  0.115 -0.118 
   486    487    488    489    490    491    492    493    494    495    496    497    498    499    500    501    502    503 
-0.233 -0.238 -0.138  0.104  0.246  0.107 -0.119 -0.227 -0.220 -0.128  0.100  0.233  0.089 -0.133 -0.240 -0.246 -0.151  0.072 
   504    505    506    507    508    509    510    511    512    513    514    515    516    517    518    519    520    521 
 0.210  0.069 -0.158 -0.265 -0.277 -0.174  0.038  0.178  0.032 -0.193 -0.301 -0.291 -0.172  0.053  0.184  0.038 -0.197 -0.301 
   522    523    524    525    526    527    528    529    530    531    532    533    534    535    536    537    538    539 
-0.297 -0.192  0.030  0.164  0.023 -0.194 -0.305 -0.312 -0.193  0.034  0.170  0.015 -0.198 -0.304 -0.295 -0.191  0.033  0.167 
   540    541    542    543    544    545    546    547    548    549    550    551    552    553    554    555    556    557 
 0.021 -0.193 -0.305 -0.300 -0.183  0.019  0.161  0.018 -0.203 -0.305 -0.297 -0.190  0.034  0.165  0.035 -0.192 -0.296 -0.288 
   558    559    560    561    562    563    564    565    566    567    568    569    570    571    572    573    574    575 
-0.170  0.032  0.170  0.024 -0.190 -0.291 -0.283 -0.174  0.034  0.165  0.035 -0.179 -0.287 -0.282 -0.169  0.035  0.175  0.044 
   576    577    578    579    580    581    582    583    584    585    586    587    588    589    590    591    592    593 
-0.176 -0.283 -0.279 -0.157  0.042  0.183  0.046 -0.161 -0.257 -0.249 -0.137  0.072  0.193  0.060 -0.151 -0.239 -0.233 -0.127 
   594    595    596    597    598    599    600    601    602    603    604    605    606    607    608    609    610    611 
 0.083  0.204  0.078 -0.136 -0.214 -0.210 -0.110  0.095  0.215  0.091 -0.123 -0.222 -0.213 -0.114  0.089  0.214  0.085 -0.122 
   612    613    614    615    616    617    618    619    620    621    622    623    624    625    626    627    628    629 
-0.214 -0.204 -0.104  0.094  0.224  0.094 -0.111 -0.195 -0.199 -0.093  0.103  0.217  0.098 -0.093 -0.187 -0.187 -0.091  0.105 
   630    631    632    633    634    635    636    637    638    639    640    641    642    643    644    645    646    647 
 0.231  0.089 -0.101 -0.194 -0.190 -0.091  0.108  0.236  0.095 -0.096 -0.198 -0.187 -0.095  0.110  0.227  0.097 -0.098 -0.182 
   648    649    650    651    652    653    654    655    656    657    658    659    660    661    662    663    664    665 
-0.175 -0.081  0.117  0.240  0.116 -0.087 -0.185 -0.176 -0.079  0.117  0.241  0.115 -0.086 -0.188 -0.171 -0.066  0.136  0.254 
   666    667    668    669    670    671    672    673    674    675    676    677    678    679    680    681    682    683 
 0.117 -0.077 -0.173 -0.156 -0.052  0.141  0.249  0.119 -0.077 -0.155 -0.149 -0.053  0.137  0.259  0.120 -0.065 -0.154 -0.140 
   684    685    686    687    688    689    690    691    692    693    694    695    696    697    698    699    700    701 
-0.038  0.154  0.272  0.144 -0.049 -0.145 -0.142 -0.037  0.148  0.268  0.142 -0.054 -0.142 -0.130 -0.026  0.165  0.290  0.165 
   702    703    704    705    706    707    708    709    710    711    712    713    714    715    716    717    718    719 
-0.040 -0.131 -0.120 -0.020  0.180  0.301  0.175 -0.021 -0.113 -0.101  0.003  0.185  0.290  0.161 -0.036 -0.123 -0.103  0.003 
   720    721    722    723    724    725    726    727    728    729    730 
 0.190  0.304  0.176 -0.019 -0.101 -0.088  0.020  0.213  0.347  0.206  0.012 

summary(ur.kpss(foods_train)) # the test-statistic 3.01 is much larger than the 1pct 0.74, which means that the data is not stationary

####################### 
# KPSS Unit Root Test # 
####################### 

Test is of type: mu with 8 lags. 

Value of test-statistic is: 3.0088 

Critical value for a significance level of: 
                10pct  5pct 2.5pct  1pct
critical values 0.347 0.463  0.574 0.739
summary(ur.kpss(diff(foods_train))) 

####################### 
# KPSS Unit Root Test # 
####################### 

Test is of type: mu with 8 lags. 

Value of test-statistic is: 0.003 

Critical value for a significance level of: 
                10pct  5pct 2.5pct  1pct
critical values 0.347 0.463  0.574 0.739
kpss.test(foods_train) # 0.01
p-value smaller than printed p-value

    KPSS Test for Level Stationarity

data:  foods_train
KPSS Level = 3.0088, Truncation lag parameter = 8, p-value = 0.01
adf.test(foods_train) # 0.01
p-value smaller than printed p-value

    Augmented Dickey-Fuller Test

data:  foods_train
Dickey-Fuller = -5.5278, Lag order = 12, p-value = 0.01
alternative hypothesis: stationary
Box.test(foods_train) # p-value is tiny, reject the null hypothesis that there's no autocorrelation.

    Box-Pierce test

data:  foods_train
X-squared = 614.31, df = 1, p-value < 2.2e-16
Box.test(foods_train,type='Lj') # same as Box-Pierce test

    Box-Ljung test

data:  foods_train
X-squared = 615.28, df = 1, p-value < 2.2e-16
BoxCox.lambda(foods_train) # 0.31
[1] 0.3054523
# m=7
BoxCox.lambda(daily_ts) # 0.204
[1] 0.2042021
BoxCox.lambda(daily_train) # 0.204
[1] 0.2039999
nsdiffs(foods_train) # 0: seasonal differencing is not required 0
[1] 0
ndiffs(foods_train) # 1:  This process of using a sequence of KPSS tests to determine the appropriate number of first differences is carried out by the function ndiffs().As we saw from the KPSS tests above, one difference is required to make the sale data stationary.
[1] 1
nsdiffs(daily_train) # 1
[1] 1
ndiffs(diff(daily_train, lag = 7)) # 0
[1] 0

Linear regression model

food_train_df = food[1:1913,]
food_test_df = food[1914:1941,]
food_train_df
round(cor(food_train_df[, c(5,6,7,9,10,11,12)]),2)
                wday month  year sporting_days cultrural_days national_days religious_days
wday            1.00  0.00  0.00         -0.02          -0.07         -0.01           0.00
month           0.00  1.00 -0.16         -0.05          -0.06          0.02          -0.06
year            0.00 -0.16  1.00          0.00           0.00          0.00           0.01
sporting_days  -0.02 -0.05  0.00          1.00           0.03         -0.02          -0.02
cultrural_days -0.07 -0.06  0.00          0.03           1.00         -0.02           0.04
national_days  -0.01  0.02  0.00         -0.02          -0.02          1.00          -0.03
religious_days  0.00 -0.06  0.01         -0.02           0.04         -0.03           1.00

lm()

fit.lm1 = lm(formula = sales ~ lag(sales) + as.factor(wday) + as.factor(month)+ as.factor(year) + sporting_days + cultrural_days + national_days + religious_days, data = food_train_df)
summary(fit.lm1)

Call:
lm(formula = sales ~ lag(sales) + as.factor(wday) + as.factor(month) + 
    as.factor(year) + sporting_days + cultrural_days + national_days + 
    religious_days, data = food_train_df)

Residuals:
    Min      1Q  Median      3Q     Max 
-30.859  -6.519  -0.958   5.966  65.517 

Coefficients:
                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)          25.27999    1.27000  19.906  < 2e-16 ***
lag(sales)            0.36092    0.02103  17.160  < 2e-16 ***
as.factor(wday)2     -6.11636    0.97134  -6.297 3.77e-10 ***
as.factor(wday)3    -30.97754    0.96674 -32.043  < 2e-16 ***
as.factor(wday)4    -24.85916    0.92741 -26.805  < 2e-16 ***
as.factor(wday)5    -23.05349    0.94646 -24.358  < 2e-16 ***
as.factor(wday)6    -21.15682    0.94176 -22.465  < 2e-16 ***
as.factor(wday)7    -12.45409    0.92970 -13.396  < 2e-16 ***
as.factor(month)2     3.53303    1.18913   2.971 0.003005 ** 
as.factor(month)3     5.33709    1.17050   4.560 5.45e-06 ***
as.factor(month)4     8.18739    1.20350   6.803 1.37e-11 ***
as.factor(month)5     9.24058    1.26244   7.320 3.66e-13 ***
as.factor(month)6    14.00795    1.31561  10.648  < 2e-16 ***
as.factor(month)7    15.20330    1.32126  11.507  < 2e-16 ***
as.factor(month)8    19.38193    1.37246  14.122  < 2e-16 ***
as.factor(month)9    12.45209    1.30767   9.522  < 2e-16 ***
as.factor(month)10   10.49660    1.27215   8.251 2.91e-16 ***
as.factor(month)11    9.07927    1.27090   7.144 1.29e-12 ***
as.factor(month)12   10.71883    1.27199   8.427  < 2e-16 ***
as.factor(year)2012   6.32791    0.83049   7.619 4.01e-14 ***
as.factor(year)2013  10.47356    0.87571  11.960  < 2e-16 ***
as.factor(year)2014   9.60922    0.86520  11.106  < 2e-16 ***
as.factor(year)2015   7.52883    0.84285   8.933  < 2e-16 ***
as.factor(year)2016   8.34559    1.24682   6.693 2.87e-11 ***
sporting_days       -10.58217    2.73803  -3.865 0.000115 ***
cultrural_days       -5.81321    1.78623  -3.254 0.001156 ** 
national_days        10.28531    1.56108   6.589 5.75e-11 ***
religious_days        4.24155    1.49442   2.838 0.004585 ** 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 10.61 on 1884 degrees of freedom
  (1 observation deleted due to missingness)
Multiple R-squared:  0.6795,    Adjusted R-squared:  0.6749 
F-statistic: 147.9 on 27 and 1884 DF,  p-value: < 2.2e-16
checkresiduals(fit.lm1)

    Breusch-Godfrey test for serial correlation of order up to 31

data:  Residuals
LM test = 89.077, df = 31, p-value = 1.599e-07

CV(fit.lm1)
          CV          AIC         AICc          BIC        AdjR2 
 114.2756442 9059.7334945 9060.6580429 9220.8547422    0.6748823 
autoplot(daily_train, series='Observations') +
  autolayer(ts(predict.lm(fit.lm1), frequency = 7), series = 'Linear regression') +
  ylab("Unit sales") +
  xlab("Week") + 
  ggtitle("Daily unit sales (linear regression model)")

autoplot(daily_test, series = 'Observations') + 
  autolayer(ts(predict.lm(fit.lm1, food_test_df), frequency = 7, start = c(275,2) ), series = 'Linear regression') +
  ylab("Unit sales") +
  xlab("Week") + 
  ggtitle("Predicted daily unit sales (linear regression model)")

tslm()

lambda = BoxCox.lambda(daily_train)
fit.tslm1 = tslm(daily_train ~ trend + season)
summary(fit.tslm1)

Call:
tslm(formula = daily_train ~ trend + season)

Residuals:
    Min      1Q  Median      3Q     Max 
-36.834  -9.540  -2.207   7.454  90.456 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  5.011e+01  1.043e+00  48.047   <2e-16 ***
trend        5.909e-03  5.976e-04   9.889   <2e-16 ***
season2     -2.365e+01  1.234e+00 -19.161   <2e-16 ***
season3     -2.708e+01  1.234e+00 -21.943   <2e-16 ***
season4     -2.617e+01  1.234e+00 -21.203   <2e-16 ***
season5     -2.425e+01  1.234e+00 -19.650   <2e-16 ***
season6     -1.481e+01  1.234e+00 -11.998   <2e-16 ***
season7      8.453e-01  1.233e+00   0.686    0.493    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 14.43 on 1905 degrees of freedom
Multiple R-squared:  0.4001,    Adjusted R-squared:  0.3979 
F-statistic: 181.5 on 7 and 1905 DF,  p-value: < 2.2e-16
checkresiduals(fit.tslm1)

    Breusch-Godfrey test for serial correlation of order up to 14

data:  Residuals from Linear regression model
LM test = 820.75, df = 14, p-value < 2.2e-16

CV(fit.tslm1) # 1.022361e+04
          CV          AIC         AICc          BIC        AdjR2 
2.091685e+02 1.022361e+04 1.022371e+04 1.027362e+04 3.979282e-01 
autoplot(daily_train, series = 'Observations') +
  autolayer(fitted(fit.tslm1), series = 'fitted') +
  xlim(50, 100)
Scale for 'x' is already present. Adding another scale for 'x', which will replace the existing scale.

  
autoplot(daily_test, series = 'Observations') +
  autolayer(forecast(fit.tslm1, h=28), PI = F, series = 'linear regression model') + 
  ylab("Predited unit sales") +
  xlab("Week") + 
  ggtitle("Predicted daily unit sales (linear regressiom model)")

tslm with fouriser terms

fit.tslmfouriser1 = tslm(daily_train ~ trend + fourier(daily_train, K=3))
summary(fit.tslmfouriser1)

Call:
tslm(formula = daily_train ~ trend + fourier(daily_train, K = 3))

Residuals:
    Min      1Q  Median      3Q     Max 
-36.834  -9.540  -2.207   7.454  90.456 

Coefficients:
                                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)                     33.6628695  0.6602475  50.985  < 2e-16 ***
trend                            0.0059092  0.0005976   9.889  < 2e-16 ***
fourier(daily_train, K = 3)S1-7 13.3219050  0.4665453  28.554  < 2e-16 ***
fourier(daily_train, K = 3)C1-7  6.3226551  0.4668196  13.544  < 2e-16 ***
fourier(daily_train, K = 3)S2-7  2.9797158  0.4666483   6.385 2.14e-10 ***
fourier(daily_train, K = 3)C2-7 -5.0435602  0.4667159 -10.806  < 2e-16 ***
fourier(daily_train, K = 3)S3-7 -1.7778566  0.4667310  -3.809 0.000144 ***
fourier(daily_train, K = 3)C3-7  0.3581564  0.4666335   0.768 0.442860    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 14.43 on 1905 degrees of freedom
Multiple R-squared:  0.4001,    Adjusted R-squared:  0.3979 
F-statistic: 181.5 on 7 and 1905 DF,  p-value: < 2.2e-16
checkresiduals(fit.tslmfouriser1)

    Breusch-Godfrey test for serial correlation of order up to 14

data:  Residuals from Linear regression model
LM test = 820.75, df = 14, p-value < 2.2e-16

CV(fit.tslmfouriser1)
          CV          AIC         AICc          BIC        AdjR2 
2.091685e+02 1.022361e+04 1.022371e+04 1.027362e+04 3.979282e-01 

ETS model

fit.ets0 = ets(daily_train) # MAM, 23422.99
fit.ets0 
ETS(M,A,M) 

Call:
 ets(y = daily_train) 

  Smoothing parameters:
    alpha = 0.1955 
    beta  = 3e-04 
    gamma = 2e-04 

  Initial states:
    l = 13.2435 
    b = 0.3654 
    s = 1.016 0.8125 0.7568 0.7335 0.8313 1.3992
           1.4507

  sigma:  0.281

     AIC     AICc      BIC 
23422.82 23422.99 23489.50 
fit.ets1 = ets(daily_train, lambda = 'auto') # (A,N,A)  12437.65
fit.ets1
ETS(A,N,A) 

Call:
 ets(y = daily_train, lambda = "auto") 

  Box-Cox transformation: lambda= 0.204 

  Smoothing parameters:
    alpha = 0.2183 
    gamma = 1e-04 

  Initial states:
    l = 3.6266 
    s = 0.1583 -0.4108 -0.5219 -0.5823 -0.381 0.8425
           0.8952

  sigma:  0.5884

     AIC     AICc      BIC 
12437.53 12437.65 12493.10 
fit.ets2 = ets(daily_train, lambda = 0) # (A,N,A) 9747.518
fit.ets2
ETS(A,N,A) 

Call:
 ets(y = daily_train, lambda = 0) 

  Box-Cox transformation: lambda= 0 

  Smoothing parameters:
    alpha = 0.2092 
    gamma = 1e-04 

  Initial states:
    l = 2.5511 
    s = 0.0854 -0.1963 -0.2504 -0.2819 -0.1841 0.4
           0.4274

  sigma:  0.2913

     AIC     AICc      BIC 
9747.402 9747.518 9802.966 
fit.ets3 = ets(daily_train, lambda = 0, damped = TRUE) # (A,Ad,A), 9767.98
fit.ets3
ETS(A,Ad,A) 

Call:
 ets(y = daily_train, damped = TRUE, lambda = 0) 

  Box-Cox transformation: lambda= 0 

  Smoothing parameters:
    alpha = 0.2313 
    beta  = 0.0021 
    gamma = 1e-04 
    phi   = 0.98 

  Initial states:
    l = 2.3039 
    b = 0.0324 
    s = 0.0829 -0.1921 -0.2555 -0.2696 -0.1934 0.3976
           0.4301

  sigma:  0.2926

     AIC     AICc      BIC 
9767.784 9767.976 9840.018 
# fit.ets3 = ets(diff(daily_train,lag=7), lambda = 0)
# fit.ets3# no model to be fitted in

Holt-Winters model

fit.hw0 = hw(daily_train,h = 28) # "addtive" 23774
fit.hw0$model
Holt-Winters' additive method 

Call:
 hw(y = daily_train, h = 28) 

  Smoothing parameters:
    alpha = 0.2107 
    beta  = 1e-04 
    gamma = 1e-04 

  Initial states:
    l = 8.3114 
    b = 0.0128 
    s = 1.706 -7.5146 -8.9321 -10.8604 -7.2023 15.9125
           16.891

  sigma:  11.3844

     AIC     AICc      BIC 
23774.16 23774.33 23840.84 
fit.hw1 = hw(daily_train, seasonal = "additive",h = 28) # 23774.33 
fit.hw1$model
Holt-Winters' additive method 

Call:
 hw(y = daily_train, h = 28, seasonal = "additive") 

  Smoothing parameters:
    alpha = 0.2107 
    beta  = 1e-04 
    gamma = 1e-04 

  Initial states:
    l = 8.3114 
    b = 0.0128 
    s = 1.706 -7.5146 -8.9321 -10.8604 -7.2023 15.9125
           16.891

  sigma:  11.3844

     AIC     AICc      BIC 
23774.16 23774.33 23840.84 
fit.hw2 = hw(daily_train, seasonal = "mult", h = 28) # 23462.01
fit.hw2$model
Holt-Winters' multiplicative method 

Call:
 hw(y = daily_train, h = 28, seasonal = "mult") 

  Smoothing parameters:
    alpha = 0.134 
    beta  = 1e-04 
    gamma = 1e-04 

  Initial states:
    l = 13.8376 
    b = 0.1209 
    s = 1.0418 0.8169 0.7615 0.7387 0.8279 1.4021
           1.4112

  sigma:  0.2847

     AIC     AICc      BIC 
23461.85 23462.01 23528.53 
fit.hw3 = hw(daily_train, seasonal = "mult", damped = T, h = 28) # 23564.84
fit.hw3$model
Damped Holt-Winters' multiplicative method 

Call:
 hw(y = daily_train, h = 28, seasonal = "mult", damped = T) 

  Smoothing parameters:
    alpha = 0.096 
    beta  = 1e-04 
    gamma = 0.0377 
    phi   = 0.98 

  Initial states:
    l = 12.6478 
    b = 0.2251 
    s = 1.1251 0.8357 0.7925 0.7678 0.7296 1.3731
           1.3762

  sigma:  0.2954

     AIC     AICc      BIC 
23564.65 23564.84 23636.89 
fit.hw4 = hw(daily_train, seasonal = "additive", h = 28, lambda = 'auto') # 12442.36
fit.hw4$model
Holt-Winters' additive method 

Call:
 hw(y = daily_train, h = 28, seasonal = "additive", lambda = "auto") 

  Box-Cox transformation: lambda= 0.204 

  Smoothing parameters:
    alpha = 0.2254 
    beta  = 1e-04 
    gamma = 1e-04 

  Initial states:
    l = 3.3738 
    b = 8e-04 
    s = 0.1533 -0.4186 -0.5203 -0.5936 -0.3868 0.8573
           0.9087

  sigma:  0.5888

     AIC     AICc      BIC 
12442.19 12442.36 12508.87 
fit.hw5 = hw(daily_train, seasonal = "additive", h = 28, lambda = 0 ) # 9759.407 AAA
fit.hw5$model
Holt-Winters' additive method 

Call:
 hw(y = daily_train, h = 28, seasonal = "additive", lambda = 0) 

  Box-Cox transformation: lambda= 0 

  Smoothing parameters:
    alpha = 0.2053 
    beta  = 1e-04 
    gamma = 0.0025 

  Initial states:
    l = 2.5897 
    b = 0.0017 
    s = 0.0841 -0.2075 -0.2421 -0.2643 -0.19 0.3788
           0.441

  sigma:  0.292

     AIC     AICc      BIC 
9759.243 9759.407 9825.920 
# fit.hw6 = hw(daily_train, seasonal = "mult", h = 28, lambda = 0 ) # forbidden combination of mult and lambda
# fit.hw6$model 
fit.ets2 %>% forecast(h=28) %>%
  autoplot() +
  ylab("Unit sales") +
  autolayer(fitted(fit.ets2), series = 'ETS(A, N, A)')

fit.ets2 %>% forecast(h=28) %>%
  autoplot() +
  ylab("Unit sales") + 
  xlim(275, 280)
Scale for 'x' is already present. Adding another scale for 'x', which will replace the existing scale.

fit.ets2 %>% forecast(h=28) %>%
  autoplot(PI = F, series = 'ETS(A, N, A)') +
  ylab("Unit sales") + 
  xlim(275, 279) +
  autolayer(daily_test) +
  autolayer(fit.hw5, series = "additive + log", PI = F)
Scale for 'x' is already present. Adding another scale for 'x', which will replace the existing scale.

autoplot(daily_train, series = 'observations') +
  autolayer(fit.hw5, PI = F) +
  autolayer(fitted(fit.hw5),series = "additive + log")+
  ggtitle('Observations and predictions from additive HW model with log transformation')

autoplot(daily_test, series = 'Observations') +
  autolayer(fit.hw5, PI = F, series = 'Additive HW model') + 
  ylab("Predited unit sales") +
  xlab("Week") + 
  ggtitle("Predicted daily unit sales (additive HW model with log transformation")

checkresiduals(fit.hw5)

    Ljung-Box test

data:  Residuals from Holt-Winters' additive method
Q* = 84.376, df = 3, p-value < 2.2e-16

Model df: 11.   Total lags used: 14

accuracy(fit.hw5, daily_test) # additive + log
                   ME      RMSE      MAE       MPE     MAPE      MASE      ACF1 Theil's U
Training set 0.862237 11.370557 8.413686 -4.984152 23.81488 0.7249112 0.2060711        NA
Test set     2.234848  9.688775 7.214511 -1.433060 21.13814 0.6215919 0.1248852 0.5210131

Arima

summary(ur.kpss(daily_train))  # not stationary

####################### 
# KPSS Unit Root Test # 
####################### 

Test is of type: mu with 8 lags. 

Value of test-statistic is: 3.0088 

Critical value for a significance level of: 
                10pct  5pct 2.5pct  1pct
critical values 0.347 0.463  0.574 0.739
summary(ur.kpss(diff(daily_train))) # stationary

####################### 
# KPSS Unit Root Test # 
####################### 

Test is of type: mu with 8 lags. 

Value of test-statistic is: 0.003 

Critical value for a significance level of: 
                10pct  5pct 2.5pct  1pct
critical values 0.347 0.463  0.574 0.739
summary(ur.kpss(diff(daily_train, lag=7))) # stationary

####################### 
# KPSS Unit Root Test # 
####################### 

Test is of type: mu with 8 lags. 

Value of test-statistic is: 0.0233 

Critical value for a significance level of: 
                10pct  5pct 2.5pct  1pct
critical values 0.347 0.463  0.574 0.739
lambda = BoxCox.lambda(daily_train) 
autoplot(diff(daily_train, lag=7)) # seasonally differenced data

daily_train %>% ggtsdisplay() 

daily_train %>%  diff(lag=7) %>% ggtsdisplay()

choose ARIMA models manually

daily_train %>% 
  Arima(c(0,0,1), seasonal = c(0,1,1)) %>% 
  residuals() %>% ggtsdisplay()

introduce variation

(daily_train %>% 
  Arima(c(0,0,1), seasonal = c(0,1,1))) # 14786.4
Series: . 
ARIMA(0,0,1)(0,1,1)[7] 

Coefficients:
         ma1     sma1
      0.3360  -0.7807
s.e.  0.0191   0.0161

sigma^2 estimated as 136.2:  log likelihood=-7390.19
AIC=14786.38   AICc=14786.4   BIC=14803.04
(daily_train %>% 
  Arima(c(1,0,1), seasonal = c(0,1,1))) # 14663.87
Series: . 
ARIMA(1,0,1)(0,1,1)[7] 

Coefficients:
         ar1      ma1     sma1
      0.7673  -0.4165  -0.8652
s.e.  0.0512   0.0736   0.0200

sigma^2 estimated as 127.5:  log likelihood=-7327.92
AIC=14663.84   AICc=14663.87   BIC=14686.06
(daily_train %>% 
  Arima(c(2,0,1), seasonal = c(0,1,1))) # 14577.05
Series: . 
ARIMA(2,0,1)(0,1,1)[7] 

Coefficients:
         ar1      ar2      ma1     sma1
      1.2973  -0.3042  -0.9107  -0.9869
s.e.  0.0279   0.0265   0.0141   0.0103

sigma^2 estimated as 120.9:  log likelihood=-7283.51
AIC=14577.02   AICc=14577.05   BIC=14604.78
(daily_train %>% 
  Arima(c(0,0,1), seasonal = c(0,1,2))) # 14786.22
Series: . 
ARIMA(0,0,1)(0,1,2)[7] 

Coefficients:
         ma1     sma1     sma2
      0.3357  -0.7555  -0.0351
s.e.  0.0190   0.0231   0.0238

sigma^2 estimated as 136.1:  log likelihood=-7389.1
AIC=14786.2   AICc=14786.22   BIC=14808.41
(daily_train %>% 
  Arima(c(1,0,1), seasonal = c(0,1,2))) # 14662.2
Series: . 
ARIMA(1,0,1)(0,1,2)[7] 

Coefficients:
         ar1      ma1     sma1     sma2
      0.7720  -0.4165  -0.8365  -0.0510
s.e.  0.0515   0.0722   0.0260   0.0275

sigma^2 estimated as 127.2:  log likelihood=-7326.08
AIC=14662.17   AICc=14662.2   BIC=14689.93
(daily_train %>% 
  Arima(c(2,0,1), seasonal = c(0,1,2))) # 14568.07
Series: . 
ARIMA(2,0,1)(0,1,2)[7] 

Coefficients:
         ar1      ar2      ma1     sma1     sma2
      1.3082  -0.3138  -0.9208  -0.9170  -0.0765
s.e.  0.0271   0.0259   0.0132   0.0237   0.0231

sigma^2 estimated as 120:  log likelihood=-7278.01
AIC=14568.03   AICc=14568.07   BIC=14601.34
(daily_train %>% 
  Arima(c(2,0,1), seasonal = c(1,1,2))) # 14556.24 *****
Series: . 
ARIMA(2,0,1)(1,1,2)[7] 

Coefficients:
         ar1      ar2      ma1    sar1     sma1    sma2
      1.3023  -0.3105  -0.9186  0.8309  -1.7596  0.7596
s.e.  0.0293   0.0273   0.0163  0.0518   0.0591  0.0591

sigma^2 estimated as 118.8:  log likelihood=-7271.09
AIC=14556.18   AICc=14556.24   BIC=14595.05
(daily_train %>% 
  Arima(c(2,0,1), seasonal = c(2,1,2))) # 14559.84
Series: . 
ARIMA(2,0,1)(2,1,2)[7] 

Coefficients:
         ar1      ar2      ma1    sar1     sar2     sma1    sma2
      1.3077  -0.3146  -0.9214  0.7952  -0.0088  -1.7277  0.7285
s.e.  0.0416   0.0379   0.0197  0.0562   0.0265   0.0537  0.0524

sigma^2 estimated as 119.2:  log likelihood=-7271.88
AIC=14559.76   AICc=14559.84   BIC=14604.18

manually selected ARIMA

daily_train %>% 
  Arima(c(2,0,1), seasonal = c(1,1,2)) %>% 
  residuals() %>% ggtsdisplay()

(fit.arima1 <- daily_train %>% 
  Arima(c(2,0,1), seasonal = c(1,1,2)))
Series: . 
ARIMA(2,0,1)(1,1,2)[7] 

Coefficients:
         ar1      ar2      ma1    sar1     sma1    sma2
      1.3023  -0.3105  -0.9186  0.8309  -1.7596  0.7596
s.e.  0.0293   0.0273   0.0163  0.0518   0.0591  0.0591

sigma^2 estimated as 118.8:  log likelihood=-7271.09
AIC=14556.18   AICc=14556.24   BIC=14595.05
checkresiduals(fit.arima1)

    Ljung-Box test

data:  Residuals from ARIMA(2,0,1)(1,1,2)[7]
Q* = 13.343, df = 8, p-value = 0.1006

Model df: 6.   Total lags used: 14

autoplot(daily_test, series = 'Observations') +
  autolayer(forecast(fit.arima1, h=28), series = 'ARIMA', PI=F) + 
  ggtitle('Predicted daily unit sales (ARIMA(2,0,1)(1,1,2)[7])') +
  ylab('Unit sales') + 
  xlab('Week')

autoplot(daily_test, series = 'Observations') +
  autolayer(fitted(fit.arima1)) +
  autolayer(forecast(fit.arima1, h=28), series = 'ARIMA', PI=F) +
  ggtitle('Daily unit sales (ARIMA(2,0,1)(1,1,2)[7])') +
  ylab('Unit sales') + 
  xlab('Week')

auto.arima suggests ARIMA(1,0,1)(0,1,2)[7] model

fit.arimalog = auto.arima(daily_train)
checkresiduals(fit.arimalog)

    Ljung-Box test

data:  Residuals from ARIMA(1,0,1)(0,1,2)[7] with drift
Q* = 53.291, df = 9, p-value = 2.574e-08

Model df: 5.   Total lags used: 14

autoplot(daily_test, series = 'Observations') +
  autolayer(forecast(fit.arimalog, h=28), PI=F, series = 'Auto arima') +
  xlab('Week') +
  ylab('Unit sales') + 
  ggtitle('Predicted daily unit sales')

dynamic regression

plots <- list()
for (i in seq(3)) {
  fit <- auto.arima(daily_train, xreg = fourier(daily_train, K = i),
    seasonal = FALSE, lambda = 0)
  plots[[i]] <- autoplot(forecast(fit,
      xreg=fourier(daily_train, K=i, h=28))) +
    xlim(250,280) +
    xlab(paste("K=",i,"   AICC=",round(fit[["aicc"]],2))) +
    ylab("unit sales")
}
Scale for 'x' is already present. Adding another scale for 'x', which will replace the existing scale.
Scale for 'x' is already present. Adding another scale for 'x', which will replace the existing scale.
Scale for 'x' is already present. Adding another scale for 'x', which will replace the existing scale.
gridExtra::grid.arrange(
  plots[[1]],plots[[2]],plots[[3]],nrow=3)

checkresiduals(fit.dynamic3)

    Ljung-Box test

data:  Residuals from Regression with ARIMA(2,1,1) errors
Q* = 5.3023, df = 5, p-value = 0.3801

Model df: 9.   Total lags used: 14

(fit.dynamic3 <- auto.arima(daily_train, xreg = fourier(daily_train, K = 3),
    seasonal = FALSE, lambda = 0))
Series: daily_train 
Regression with ARIMA(2,1,1) errors 
Box Cox transformation: lambda= 0 

Coefficients:
         ar1     ar2      ma1    S1-7    C1-7    S2-7     C2-7     S3-7    C3-7
      0.2573  0.0446  -0.9155  0.3332  0.1755  0.0702  -0.1117  -0.0405  0.0213
s.e.  0.0265  0.0256   0.0130  0.0100  0.0100  0.0079   0.0079   0.0073  0.0073

sigma^2 estimated as 0.08135:  log likelihood=-310.5
AIC=640.99   AICc=641.11   BIC=696.55
autoplot(daily_test, series = 'Observations') +
  autolayer(forecast(fit.dynamic3,
      xreg=fourier(daily_train, K=i, h=28)), PI = F, series = 'Dynamic regression') +
    ylab("unit sales") +
  xlab("Week") +
  ggtitle('Predicted unit sales (dynamic regression with ARIMA(2,1,1) errors (K=3))')

NA

Comparison

Accuracy

print("Linear regression")
[1] "Linear regression"
accuracy(ts(predict.lm(fit.lm1, food_test), frequency = 7, start = c(275,2)), daily_test)
                 ME    RMSE      MAE       MPE     MAPE       ACF1 Theil's U
Test set -0.9138804 9.60387 7.269412 -11.12887 23.62771 -0.3169598 0.5655129
print("Additice Holt-Winters' model with log transformation")
[1] "Additice Holt-Winters' model with log transformation"
accuracy(fit.hw5, daily_test)
                   ME      RMSE      MAE       MPE     MAPE      MASE      ACF1 Theil's U
Training set 0.862237 11.370557 8.413686 -4.984152 23.81488 0.7249112 0.2060711        NA
Test set     2.234848  9.688775 7.214511 -1.433060 21.13814 0.6215919 0.1248852 0.5210131
print("Manually optimozed ARIMA(2,0,1)(1,1,2)[7]")
[1] "Manually optimozed ARIMA(2,0,1)(1,1,2)[7]"
accuracy(forecast(fit.arima1, h=28), daily_test)
                   ME      RMSE      MAE       MPE     MAPE      MASE         ACF1 Theil's U
Training set 0.511315 10.864516 8.185594 -6.383054 23.68489 0.7052591 -0.004813012        NA
Test set     1.661816  9.539802 7.227151 -3.130000 21.54173 0.6226810  0.133261719 0.5247036
print("Dynamic regression with ARIMA(2,1,1) errors (K=3)")
[1] "Dynamic regression with ARIMA(2,1,1) errors (K=3)"
accuracy(forecast(fit.dynamic3,
      xreg=fourier(daily_train, K=3, h=28)), daily_test)
                   ME      RMSE      MAE         MPE     MAPE      MASE       ACF1 Theil's U
Training set 1.270446 10.903569 8.168515 -3.81455307 23.05112 0.7037876 0.06327517        NA
Test set     2.725233  9.698695 7.226867 -0.02355306 21.07680 0.6226566 0.12913990 0.5179121

Prediction curves

autoplot(daily_test, series = 'Observations') + 
  autolayer(ts(predict.lm(fit.lm1, food_test), frequency = 7, start = c(275,2) ), series = 'Linear regression') +
  autolayer(fit.hw5, PI = F, series = 'Holt-Winters model (log)') + 
  autolayer(forecast(fit.arima1, h=28), series = 'ARIMA', PI=F) +
  autolayer(forecast(fit.dynamic3, xreg=fourier(daily_train, K=i, h=28)), PI = F, series = 'Dynamic regression') +
  ylab("Predited unit sales") +
  xlab("Week") +
  ggtitle("Predicted daily unit sales")

NA
---
title: "Walmart Forecasting"
output:
  html_notebook: default
  html_document:
    df_print: paged
  pdf_document: default
---

loading required libraries. 
```{r}
library(tidyverse)
library(ggplot2)
library(forecast)
library(fpp2)
library(fma)
library(lmtest)
library(dbplyr)
library(lubridate)
library(seasonal)
library(urca)
library(tseries)
library(gridExtra)
```

```{r}
food<- read_csv('../data/calendar_factors.csv')
food.252 = as.vector(unlist(food['sales']))
```

# Exploratory Data Analysis (EDA)
```{r}
# frequency m = 365
foods_sale = ts(food.252, start=c(2011,29), end = c(2016,144), frequency = 365) # 1941 days 
foods_train = window(foods_sale, start=c(2011,29), end = c(2016,116) ) # 1913 days 1-1913
foods_test = window(foods_sale, start=c(2016,117), end = c(2016,144) ) # 28 days 1914-1941
```


```{r}
# frequency m = 7
daily_ts = ts(food.252, start=c(1,7), end = c(279,1), frequency = 7) # 1-1941: 2011-1-29 ~ 2016-5-22
daily_train = subset(daily_ts, end = 1913) # training set: 1-1913, 2011-1-29 ~ 2016-4-25 c(1,7)
daily_test = subset(daily_ts, start = 1914) # test set: 1913-1941, 2016-4-26 ~ 2016-5-22 c(275,2)
```


```{r}
# m=365
autoplot(foods_train) + ggtitle("Daily unit sales (01/19/2011 - 05/12/2016)") + ylab("Figure.1 Daily unit sales") + xlab("Year")
autoplot(decompose(foods_train, type='additive'))+ ggtitle("Figure.2 Additive decomposistion") + xlab("Year") + ylab("Component") # additive decomposition
autoplot(decompose(foods_train, type='mult')) + ggtitle("Multiplicative decomposition") + xlab("Year") + ylab("Component") # multiplicative decomposition
hist(foods_train, main = 'Distribution of daily unit sales', xlab='Daily unit sale')
```


```{r}
# m=7
autoplot(daily_train) + ggtitle("Unit sale time plot")
autoplot(decompose(daily_train, type='additive')) # additive decomposition
autoplot(decompose(daily_train, type='mult')) # multiplicative decomposition
hist(daily_train)
autoplot(diff(daily_train, lag=7))
ggsubseriesplot(daily_train) + ggtitle("Unit sales during a week") # unit sale is higher on weekends
```

```{r}
summary(foods_train)
```


```{r}
# m=365
print(Acf(foods_train))  # non-stationary, as r1 is large and positive, and the ACF drops to zero slowly
summary(ur.kpss(foods_train)) # the test-statistic 3.01 is much larger than the 1pct 0.74, which means that the data is not stationary
summary(ur.kpss(diff(foods_train))) 
kpss.test(foods_train) # 0.01
adf.test(foods_train) # 0.01
Box.test(foods_train) # p-value is tiny, reject the null hypothesis that there's no autocorrelation.
Box.test(foods_train,type='Lj') # same as Box-Pierce test
BoxCox.lambda(foods_train) # 0.31
# m=7
BoxCox.lambda(daily_ts) # 0.204
BoxCox.lambda(daily_train) # 0.204
```

```{r}
nsdiffs(foods_train) # 0: seasonal differencing is not required 0
ndiffs(foods_train) # 1:  This process of using a sequence of KPSS tests to determine the appropriate number of first differences is carried out by the function ndiffs().As we saw from the KPSS tests above, one difference is required to make the sale data stationary.

nsdiffs(daily_train) # 1
ndiffs(diff(daily_train, lag = 7)) # 0
```

# Linear regression model
```{r}
food_train_df = food[1:1913,]
food_test_df = food[1914:1941,]
food_train_df
```

```{r}
round(cor(food_train_df[, c(5,6,7,9,10,11,12)]),2)
```
## lm()
```{r}
fit.lm1 = lm(formula = sales ~ lag(sales) + as.factor(wday) + as.factor(month)+ as.factor(year) + sporting_days + cultrural_days + national_days + religious_days, data = food_train_df)
summary(fit.lm1)
checkresiduals(fit.lm1)
CV(fit.lm1)
```
```{r}
autoplot(daily_train, series='Observations') +
  autolayer(ts(predict.lm(fit.lm1), frequency = 7), series = 'Linear regression') +
  ylab("Unit sales") +
  xlab("Week") + 
  ggtitle("Daily unit sales (linear regression model)")

```
```{r}
autoplot(daily_test, series = 'Observations') + 
  autolayer(ts(predict.lm(fit.lm1, food_test_df), frequency = 7, start = c(275,2) ), series = 'Linear regression') +
  ylab("Unit sales") +
  xlab("Week") + 
  ggtitle("Predicted daily unit sales (linear regression model)")
```

##  tslm()
```{r}
lambda = BoxCox.lambda(daily_train)
fit.tslm1 = tslm(daily_train ~ trend + season)
summary(fit.tslm1)
checkresiduals(fit.tslm1)
CV(fit.tslm1) # 1.022361e+04

autoplot(daily_train, series = 'Observations') +
  autolayer(fitted(fit.tslm1), series = 'fitted') +
  xlim(50, 100)
  
autoplot(daily_test, series = 'Observations') +
  autolayer(forecast(fit.tslm1, h=28), PI = F, series = 'linear regression model') + 
  ylab("Predited unit sales") +
  xlab("Week") + 
  ggtitle("Predicted daily unit sales (linear regressiom model)")
```

## tslm with fouriser terms
```{r}
fit.tslmfouriser1 = tslm(daily_train ~ trend + fourier(daily_train, K=3))
summary(fit.tslmfouriser1)
checkresiduals(fit.tslmfouriser1)
CV(fit.tslmfouriser1)
```

# ETS model
```{r}
fit.ets0 = ets(daily_train) # MAM, 23422.99
fit.ets0 
fit.ets1 = ets(daily_train, lambda = 'auto') # (A,N,A)  12437.65
fit.ets1
fit.ets2 = ets(daily_train, lambda = 0) # (A,N,A) 9747.518
fit.ets2
fit.ets3 = ets(daily_train, lambda = 0, damped = TRUE) # (A,Ad,A), 9767.98
fit.ets3
# fit.ets3 = ets(diff(daily_train,lag=7), lambda = 0)
# fit.ets3# no model to be fitted in
```

# Holt-Winters model
```{r}
fit.hw0 = hw(daily_train,h = 28) # "addtive" 23774
fit.hw0$model
fit.hw1 = hw(daily_train, seasonal = "additive",h = 28) # 23774.33 
fit.hw1$model
fit.hw2 = hw(daily_train, seasonal = "mult", h = 28) # 23462.01
fit.hw2$model
fit.hw3 = hw(daily_train, seasonal = "mult", damped = T, h = 28) # 23564.84
fit.hw3$model
fit.hw4 = hw(daily_train, seasonal = "additive", h = 28, lambda = 'auto') # 12442.36
fit.hw4$model
fit.hw5 = hw(daily_train, seasonal = "additive", h = 28, lambda = 0 ) # 9759.407 AAA
fit.hw5$model
# fit.hw6 = hw(daily_train, seasonal = "mult", h = 28, lambda = 0 ) # forbidden combination of mult and lambda
# fit.hw6$model 
```

```{r}
fit.ets2 %>% forecast(h=28) %>%
  autoplot() +
  ylab("Unit sales") +
  autolayer(fitted(fit.ets2), series = 'ETS(A, N, A)')
```

```{r}
fit.ets2 %>% forecast(h=28) %>%
  autoplot() +
  ylab("Unit sales") + 
  xlim(275, 280)
```

```{r}
fit.ets2 %>% forecast(h=28) %>%
  autoplot(PI = F, series = 'ETS(A, N, A)') +
  ylab("Unit sales") + 
  xlim(275, 279) +
  autolayer(daily_test) +
  autolayer(fit.hw5, series = "additive + log", PI = F)
```

```{r}
autoplot(daily_train, series = 'observations') +
  autolayer(fit.hw5, PI = F) +
  autolayer(fitted(fit.hw5),series = "additive + log")+
  ggtitle('Observations and predictions from additive HW model with log transformation')
```

```{r}
autoplot(daily_test, series = 'Observations') +
  autolayer(fit.hw5, PI = F, series = 'Additive HW model') + 
  ylab("Predited unit sales") +
  xlab("Week") + 
  ggtitle("Predicted daily unit sales (additive HW model with log transformation")
```

```{r}
checkresiduals(fit.hw5)
```

```{r}
accuracy(fit.hw5, daily_test) # additive + log
```

# Arima
```{r}
summary(ur.kpss(daily_train))  # not stationary
summary(ur.kpss(diff(daily_train))) # stationary
summary(ur.kpss(diff(daily_train, lag=7))) # stationary
lambda = BoxCox.lambda(daily_train) 
```

```{r}
autoplot(diff(daily_train, lag=7)) # seasonally differenced data
daily_train %>% ggtsdisplay() 
daily_train %>%  diff(lag=7) %>% ggtsdisplay()
```
## choose ARIMA models manually
```{r}
daily_train %>% 
  Arima(c(0,0,1), seasonal = c(0,1,1)) %>% 
  residuals() %>% ggtsdisplay()
```
# introduce variation
```{r}
(daily_train %>% 
  Arima(c(0,0,1), seasonal = c(0,1,1))) # 14786.4
(daily_train %>% 
  Arima(c(1,0,1), seasonal = c(0,1,1))) # 14663.87
(daily_train %>% 
  Arima(c(2,0,1), seasonal = c(0,1,1))) # 14577.05
(daily_train %>% 
  Arima(c(0,0,1), seasonal = c(0,1,2))) # 14786.22
(daily_train %>% 
  Arima(c(1,0,1), seasonal = c(0,1,2))) # 14662.2
(daily_train %>% 
  Arima(c(2,0,1), seasonal = c(0,1,2))) # 14568.07
(daily_train %>% 
  Arima(c(2,0,1), seasonal = c(1,1,2))) # 14556.24 *****
(daily_train %>% 
  Arima(c(2,0,1), seasonal = c(2,1,2))) # 14559.84
```
## manually selected ARIMA
```{r}
daily_train %>% 
  Arima(c(2,0,1), seasonal = c(1,1,2)) %>% 
  residuals() %>% ggtsdisplay()
(fit.arima1 <- daily_train %>% 
  Arima(c(2,0,1), seasonal = c(1,1,2)))
checkresiduals(fit.arima1)
```

```{r}
autoplot(daily_test, series = 'Observations') +
  autolayer(forecast(fit.arima1, h=28), series = 'ARIMA', PI=F) + 
  ggtitle('Predicted daily unit sales (ARIMA(2,0,1)(1,1,2)[7])') +
  ylab('Unit sales') + 
  xlab('Week')
```

```{r}
autoplot(daily_test, series = 'Observations') +
  autolayer(fitted(fit.arima1)) +
  autolayer(forecast(fit.arima1, h=28), series = 'ARIMA', PI=F) +
  ggtitle('Daily unit sales (ARIMA(2,0,1)(1,1,2)[7])') +
  ylab('Unit sales') + 
  xlab('Week')
```
## auto.arima suggests ARIMA(1,0,1)(0,1,2)[7] model
```{r}
fit.arimalog = auto.arima(daily_train)
checkresiduals(fit.arimalog)
autoplot(daily_test, series = 'Observations') +
  autolayer(forecast(fit.arimalog, h=28), PI=F, series = 'Auto arima') +
  xlab('Week') +
  ylab('Unit sales') + 
  ggtitle('Predicted daily unit sales')
```


```{r}
# arimalog <- forecast(fit.arimalog,h=28)
# accuracy(InvBoxCox(as.numeric(arimalog$mean),0),daily_test)
# autoplot(daily_test) +
#   autolayer(ts(InvBoxCox(as.numeric(arimalog$mean)),0))
# autoplot(InvBoxCox(as.numeric(arimalog$mean),0))
# start(daily_test)
# 
# ts_df <-ts(InvBoxCox(as.numeric(arimalog$mean),0), start=c(275,2), frequency = 7)
# autoplot(daily_test) + 
#   autolayer(ts_df, series = 'arimalog') + 
#   autolayer(fit.arima0, series = 'autoarima')
# daily_ts = ts(food.252, start=c(1,7), end = c(279,1), frequency = 7) 
# 
# daily_train %>% 
#  auto.arima() %>% 
#   residuals() %>% ggtsdisplay()
# (fit.arima0 <- daily_train %>% 
#   auto.arima())
# checkresiduals(fit.arima0)
# 
# forecast(fit.arima0, h=28)
# 
# autoplot(daily_test) +
#   autolayer(forecast(fit.arimaauto, h=28), PI = F, series = "Auto") + 
#   autolayer(ts_df, series = 'arimalog')
# 
# accuracy(forecast(fit.arimaauto, h=28), daily_test) # accuracy for auto arima
# accuracy(ts_df, daily_test) # accuracy for manually optimzed arima
```

# dynamic regression
```{r}
plots <- list()
for (i in seq(3)) {
  fit <- auto.arima(daily_train, xreg = fourier(daily_train, K = i),
    seasonal = FALSE, lambda = 0)
  plots[[i]] <- autoplot(forecast(fit,
      xreg=fourier(daily_train, K=i, h=28))) +
    xlim(250,280) +
    xlab(paste("K=",i,"   AICC=",round(fit[["aicc"]],2))) +
    ylab("unit sales")
}
gridExtra::grid.arrange(
  plots[[1]],plots[[2]],plots[[3]],nrow=3)
```
```{r}
checkresiduals(fit.dynamic3)
```


```{r}
(fit.dynamic3 <- auto.arima(daily_train, xreg = fourier(daily_train, K = 3),
    seasonal = FALSE, lambda = 0))
autoplot(daily_test, series = 'Observations') +
  autolayer(forecast(fit.dynamic3,
      xreg=fourier(daily_train, K=i, h=28)), PI = F, series = 'Dynamic regression') +
    ylab("unit sales") +
  xlab("Week") +
  ggtitle('Predicted unit sales (dynamic regression with ARIMA(2,1,1) errors (K=3))')
  
```
# Comparison
## Accuracy
```{r}
print("Linear regression")
accuracy(ts(predict.lm(fit.lm1, food_test), frequency = 7, start = c(275,2)), daily_test)
print("Additice Holt-Winters' model with log transformation")
accuracy(fit.hw5, daily_test)
print("Manually optimozed ARIMA(2,0,1)(1,1,2)[7]")
accuracy(forecast(fit.arima1, h=28), daily_test)
print("Dynamic regression with ARIMA(2,1,1) errors (K=3)")
accuracy(forecast(fit.dynamic3,
      xreg=fourier(daily_train, K=3, h=28)), daily_test)
```

## Prediction curves
```{r}
autoplot(daily_test, series = 'Observations') + 
  autolayer(ts(predict.lm(fit.lm1, food_test), frequency = 7, start = c(275,2) ), series = 'Linear regression') +
  autolayer(fit.hw5, PI = F, series = 'Holt-Winters model (log)') + 
  autolayer(forecast(fit.arima1, h=28), series = 'ARIMA', PI=F) +
  autolayer(forecast(fit.dynamic3, xreg=fourier(daily_train, K=i, h=28)), PI = F, series = 'Dynamic regression') +
  ylab("Predited unit sales") +
  xlab("Week") +
  ggtitle("Predicted daily unit sales")
  
```

